
// A3_Create_dataset
//==============================================================================

// Description: This file creates the final dataset that is used for the estimation:
// - Removes outliers and creates the instrument
// - Merges in demographics
// - Creates product files that are later read into R

clear
set more off
cd "D:\data_replication"

use estimation\1_data_format\production_imports.dta, clear


// Delete Outliers and Missing Observations
//------------------------------------------------------------------------------

drop if price_pc8plus == .
sort pc8plus
by pc8plus: egen price_mean = mean(price_pc8plus)
by pc8plus: egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner


//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge

// Create Shares
sort pc8plus declarant year quarter
by pc8plus declarant year quarter: egen imports_quantity_pc8_total = sum(imports_quantity_pc8plus)
by pc8plus declarant year quarter: egen imports_value_pc8_total = sum(imports_value_pc8plus)
gen share = imports_quantity_pc8plus / imports_quantity_pc8_total
gen exp_share = imports_value_pc8plus / imports_value_pc8_total


// Create Hausmann Instruments: Current Year
sort pc8plus year partner declarant
gen price_pc8_weighted = price_pc8plus * share
by pc8plus year partner: egen price_sum = sum(price_pc8plus)
by pc8plus year partner: egen price_sum_weighted=sum(price_pc8_weighted)
by pc8plus year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc8_weighted
by pc8plus year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc8_weighted


// Create Hausmann Instruments: All years
sort pc8plus partner declarant
by pc8plus partner: egen price_sum = sum(price_pc8plus)
by pc8plus partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .

sort pc8plus declarant year quarter
by pc8plus declarant year quarter: gen products = _N
drop if products == 1

egen product_id = group(pc8plus)

sort pc8plus year quarter declarant share
order year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

save estimation\1_data_format\data_base.dta, replace


forval j = 1/3103 {
use estimation\1_data_format\data_base.dta, clear
keep if product_id == `j'
keep year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products
egen market_id = group(year quarter declarant)
save estimation\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using estimation\1_data_format\product_files\data_product_`j'.csv, comma replace
}


